# checking hukou balance across parade period as suggested by reviewer

rm(list = ls())

pkg <- c("plyr", "dplyr", "tidyr", 
         "MASS", "multiwayvcov", "fracdiff",
         "fractal",
         "lme4", "ArfimaMLM","gam",
         "forecast",
         "stargazer", "lmtest", "doBy", "ggplot2", 
         "mediation", "vars", "DataCombine", "foreign",
         "mgcv", "ordinal", "reshape2", 
         "xtable", "sandwich", "rms")
lapply(pkg, require, character.only = TRUE)

# rid of E
options(scipen=999)
### setting wd for inputs
setwd("~/Dropbox/Pollution and Public Perceptions in China/Data and Analysis/Data")

# loading the data
load("ppp_cleaned")

######### T-test for balance of HUKOU
t.test(p2$hukou[p2$parade==0],p2$hukou[p2$parade==1])

.9256449*100  #92.0%
9341564 #93.3#

######## zip and city
colnames(p2)
p.vars <- c("city","zip")
p3 <- p2[p.vars,]
colnames(p3)

